Data Cleaning

The filters applied to the parcel and crime data are:

# Get crime data from ArcGIS API and remove (4) entries with missing geometry.
# Write cleaned data to GeoJSON
# crime_dg <- st_read("https://utility.arcgis.com/usrsvcs/servers/0eaa6be357a74f5280157125e9b547fc/rest/services/OpenData_PublicSafety/FeatureServer/2/query?outFields=*&where=1%3D1&f=geojson")
# crime_dg$Date <- as.Date(fread(".//data//crime_dg.csv")$Date, "%Y/%m/%d")
# crime_dg <- crime_dg[!st_is_empty(crime_dg),]
# st_write(crime_dg, ".//data//crime_dg.geojson")

# # Read in the Hartford crime and parcel data
# crime_dg <- st_read(".//data//crime_dg.geojson")
# parcel_dg <- st_read(".//data//parcel_hartford.geojson")

# # Filter crime data to 2016-2021 and parcel data to single family homes
# crime_dg <- crime_dg |> 
#   subset(year(Date) %in% 2016:2021) |> 
#   st_transform(st_crs(hartford_tracts))
# parcel_dg <- parcel_dg |> 
#   subset(State_Use_Description == "ONE FAMILY") |> 
#   st_transform(st_crs(hartford_tracts))

Data Exploration

# Get the census tracts from Tigris for the map baselayer
hartford_census_data <- get_acs(
  geography = "tract", variable = c("B19013_001", "B01001_001"), 
  state = "CT", county = "Hartford", year = 2021
  ) |>
   dplyr::select(GEOID, variable, estimate) |>
   tidyr::pivot_wider(names_from = variable, values_from = estimate) |>
   dplyr::rename(population = "B01001_001", med_income = "B19013_001")

hartford_tracts <- st_filter(tracts(state = "CT"),
                             subset(county_subdivisions(state = "CT"), 
                                    NAMELSAD == "Hartford town"),
                             .predicate = st_within)
hartford_tracts <- merge(hartford_tracts, hartford_census_data, by = "GEOID")

water <- st_intersection(area_water("CT", "Hartford"), hartford_tracts)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries

Total Crime Count Heatmap

# Read in the Hartford crime and parcel data
# Load prepared crime data with kernel density estimates for thefts & violence
# See demo2 for details
crime_dg <- st_read(".//data//crime_hartford_2016_2021.geojson")
## Reading layer `crime_hartford_2016_2021' from data source 
##   `C:\Users\llint\OneDrive - Yale University\classes\625\CT Property\SDS625\data\crime_hartford_2016_2021.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 197060 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -72.71865 ymin: 41.72403 xmax: -72.65041 ymax: 41.80719
## Geodetic CRS:  WGS 84
parcel_dg <- st_read(".//data//parcel_hartford_single_family_crimestats.geojson")
## Reading layer `parcel_hartford_single_family_crimestats' from data source 
##   `C:\Users\llint\OneDrive - Yale University\classes\625\CT Property\SDS625\data\parcel_hartford_single_family_crimestats.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 7239 features and 51 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -72.71637 ymin: 41.72374 xmax: -72.65809 ymax: 41.80743
## Geodetic CRS:  WGS 84
# Plot
ggplot(crime_dg) +
   geom_sf(data = hartford_tracts) +
   geom_sf(data = water, color = "blue", linewidth = 0.35) +
   geom_hex(aes(X, Y, fill = log10(..count..)), 
            data = ~cbind(.x, st_coordinates(.x)), alpha = 0.75, bins = 50) +
   scale_fill_binned() +
   scale_x_continuous(guide = guide_axis(angle = 45)) +
   labs(x = "Latitude", y = "Longitude") +
   theme_bw()

Interactive Crime and Property Value Map

A sample of the data is plotted. Crime is in red, parcels are in black, and the density of crime is in blue. Additional census tract level information like median income and population are included in the tooltip. Interact with the map for details.

g <- ggplot(dplyr::sample_n(crime_dg, 1e3)) +
   geom_sf(data = hartford_tracts, aes(label1 = GEOID,
                                       label2 = med_income,
                                       label3 = population)) +
   geom_sf(data = dplyr::sample_n(parcel_dg, 1e3)) +
   geom_density_2d(aes(X,Y), data = ~cbind(.x, st_coordinates(.x))) +
   stat_sf_coordinates(size = 0.1, color = "red") +
   labs(x = "Latitude", y = "Longitude") +
   theme_bw() +
   theme(axis.text.x = element_text(angle = 45))

p <- toWebGL(ggplotly(g))
p$x$data[[4]]$hoverinfo <- "none"
p

Census Tract Level Aggregated Crime Rate and Average Property Value

The manually curated variables are:

  • Price: Assessed total value of the parcel
  • Thefts: Number of thefts (robberies, burglaries, larceny, theft, or stolen property) within 150 meters of the parcel
  • Violence: Number of violent crimes (assaults, homicides, shootings) within 150 meters of the parcel
  • Living_Area: Living area of the parcel
  • Effective_Area: Effective area of the parcel
  • Year: Approximate year the parcel was built
  • Bed: Number of bedrooms in the parcel
  • Bath: Number of bathrooms in the parcel

Violent crime rates are computed as the average number of violent crimes within 150 meters of a parcel in each tract. The average property value is computed as the mean assessed total value of the parcels in the tract.

# Subset the data and rename columns
parcel_dg %<>% 
  subset(select = c(
    "Assessed_Total", "Living_Area", "Effective_Area", "AYB",
    "Number_of_Bedroom", "Number_of_Baths", "Condition_Description",
    "Violence"))

parcel_dg %<>% 
  dplyr::rename(
    Price = "Assessed_Total", Year = "AYB", Bed = "Number_of_Bedroom",
    Bath = "Number_of_Baths", Condition = "Condition_Description")

parcel_dg$Condition %<>% 
  factor(levels = c("Dilapidated", "Very Poor", "Poor", "Fair", "Fair-Avg", 
                    "Average", "Avg-Good", "Good", "Good-VG", "Very Good", 
                    "Excellent"))

# Drop rows with NAs
paste0("Dropping n=", nrow(parcel_dg) - nrow(na.omit(parcel_dg)), 
       " rows with NAs.")
## [1] "Dropping n=19 rows with NAs."
X <- na.omit(parcel_dg)
# Assign each parcel to a census tract
X$GEOID <- hartford_tracts$GEOID[st_intersects(
  st_transform(X, crs = 26956), 
  st_transform(hartford_tracts, crs = 26956)) |> sapply(head, 1)]

# Compute rate of violent crimes per person in each tract
# and average property value in each tract
setDT(copy(X))[as.data.table(hartford_tracts),
               .(violence_rate = mean(Violence),
                 avg_property_value = mean(Price)),
               on = "GEOID", by = .EACHI]